Spectra Story

Author

Adam Kemberling

Published

October 3, 2024

Code
# Libraries
library(tidyverse)
library(gmRi)
library(rnaturalearth)
library(scales)
library(sf)
library(gt)
library(patchwork)
library(lmerTest)
library(emmeans)
library(merTools)
library(tidyquant)
library(ggeffects)
library(performance)
library(gtsummary)
library(gt)
conflicted::conflict_prefer("select", "dplyr")
conflicted::conflict_prefer("filter", "dplyr")
conflicted::conflicts_prefer(lmerTest::lmer)

# ggplot theme
theme_set(
  theme_gmri(
    rect = element_rect(fill = "white", color = NA), 
    strip.text.y = element_text(angle  = 0),
    axis.text.x = element_text(size = 8),
    legend.position = "bottom"))

# vectors for factor levels
area_levels <- c("GoM", "GB", "SNE", "MAB")
area_levels_long <- c("Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight")

# Degree symbol
deg_c <- "\u00b0C"

Storycrafting - Northeast Shelf Spectra

I think we need to step back a bit and find the main story again–which is about how size structure of the NES fish community has changed over time, whether that varies by region, and what covariates are associated with those changes.
The Aug draft you developed really focused on comparing regression models to consider drivers based on the fish community size spectrum as viewed from the perspective of the Wigley species set vs. the full species set.
However, I didn’t see figures of those spectra in that draft (maybe there are somewhere else and I’ve lost track). The recent slide deck really focuses on seasonal differences in the size spectra and influence of temperature. - KMills

Starting from Square Uno

I’d like for us to start by rebuilding the story of how the size structure has changed over time and regional differences.
As such, I would like for us to look at plots of the size spectrum slope for (1) the all-species length spectrum and (2) the Wigley species biomass spectrum from annual, seasonal and regional perspectives.
I am thinking this would be two panel plots (all-spp length, Wigley-spp biomass) with 5 subpanels each (NES, GOM, GB, SNE, MAB) that are set up to show lines by seasonal and annual time steps, much like the plot on the right of slide 7 in your most recent slide deck…except with NES added and an annual line added. Keeping the significant trend lines in is helpful. This is the first step, so when you have that ready, please share it with the group. - KMills

Code
# Data used for Wigley estimates
wigley_in <- read_csv(here::here("Data/processed/wigley_species_trawl_data.csv"))
finfish_in <- read_csv(here::here("Data/processed/finfish_trawl_data.csv"))
Code
# Load the three datasets
wigley_lenspectra <- read_csv(
  here::here("Data/processed/wigley_species_length_spectra.csv"))  %>% 
  mutate(survey_area = factor(survey_area, levels = area_levels))
wigley_bmspectra <- read_csv(
  here::here("Data/processed/wigley_species_bodymass_spectra.csv")) %>% 
  mutate(survey_area = factor(survey_area, levels = area_levels))
finfish_lenspectra <- read_csv(
  here::here("Data/processed/finfish_length_spectra.csv")) %>% 
  mutate(survey_area = factor(survey_area, levels = area_levels))



# Join them together
spectra_results <- bind_rows(
   list(
     "length_spectra" = wigley_lenspectra,
    "bodymass_spectra" =  wigley_bmspectra), 
   .id = "metric") %>% 
  mutate(community = "Wigley Subset") %>% 
  bind_rows(mutate(
    finfish_lenspectra,
    metric = "length_spectra",
    community = "Finfish Community")) %>% 
  mutate(
    survey_area = fct_relevel(survey_area, area_levels),
    metric = fct_rev(metric))



# Left side:
# All species  species length spectrum 
# color = season vs. annual
# facet_wrap(~survey_area)

# Right side:
# Wigley species biomass spectrum
# color = season vs. annual
# facet_wrap(~survey_area)


# Model significant trends separately
# Year as RE
lspectra_mod_ffish <- lmer(
  formula = b ~ est_year * survey_area * season + (1|est_year),
  data = finfish_lenspectra)
lspectra_mod_wig <- lmer(
  formula = b ~ est_year * survey_area * season + (1|est_year),
  data = wigley_lenspectra)
bmspectra_mod_wig <- lmer(
  formula = b ~ est_year * survey_area * season + (1|est_year),
  data = wigley_bmspectra)



# Function to get the Predictions
# Drop effect fits that are non-significant  ###
get_preds_and_trendsignif <- function(mod_x){
  modx_preds <- as.data.frame(
    # Model predictions
    ggpredict(
      mod_x, 
      terms = ~ est_year * survey_area * season) ) %>% 
    mutate(
      survey_area = factor(group, levels = area_levels),
      season = factor(facet, levels = c("Spring", "Fall")))
  
    # Just survey area and year
    modx_emtrend <- emtrends(
      object = mod_x,
      specs =  ~ survey_area*season,
      var = "est_year") %>% 
      as_tibble() %>% 
      mutate(
        zero = 0,
        non_zero = if_else(between(zero, lower.CL, upper.CL), F, T))
  
    # Preds with signif
    modx_preds %>% left_join(select(modx_emtrend, survey_area, season, non_zero))
  
}



# Get the predictions and flag whether they are significant
trend_predictions <- bind_rows(list(
  "length_spectra-Finfish Community" = get_preds_and_trendsignif(lspectra_mod_ffish),
  "length_spectra-Wigley Subset" = get_preds_and_trendsignif(lspectra_mod_wig),
  "bodymass_spectra-Wigley Subset" = get_preds_and_trendsignif(bmspectra_mod_wig)
  ), .id = "model_id") %>% 
  separate(model_id, into = c("metric", "community"), sep = "-") %>% 
  mutate(metric = fct_rev(metric))





# Plot over observed data
# Contrast seasonal differences




# Make the plot
spectra_trends_1g <- ggplot() +
   geom_ribbon(
    data = filter(trend_predictions, non_zero == TRUE),
    aes(x, ymin = conf.low, ymax = conf.high, fill = season),
    linewidth = 1, alpha = 0.35) +
  geom_point(
    data = spectra_results,
    aes(est_year, b, color = season),
    size = 0.8, alpha = 0.5) +
  geom_line(
    data = filter(trend_predictions, non_zero == TRUE),
    aes(x, predicted, color = season),
    linewidth = 1, alpha = 1) +
  scale_fill_gmri() +
  scale_color_gmri() +
  facet_grid(survey_area ~ metric*community, scales = "free") +
  labs(
    x = "Year",
    y = "Exponent of Size Spectra")
 

spectra_trends_1g

Model Summary Tables

Code
# Summary tables
lspectra_mod_ffish %>% 
  gtsummary::tbl_regression()   %>% 
  add_global_p(keep = T) %>% 
  bold_p(t = 0.05)  %>%
  bold_labels() %>%
  italicize_levels() %>% 
  as_gt() %>% 
  tab_header(title = "Full Finfish Community - Length Spectra Trends")
Full Finfish Community - Length Spectra Trends

Characteristic

Beta

95% CI

1

p-value

est_year 0.00 0.00, 0.00 0.2
survey_area

0.005
    GoM
    GB 1.4 -5.5, 8.4 0.7
    SNE 6.8 -0.24, 14 0.058
    MAB -6.1 -13, 0.91 0.087
season

0.2
    Fall
    Spring 4.1 -2.9, 11 0.2
est_year * survey_area

0.005
    est_year * GB 0.00 0.00, 0.00 0.6
    est_year * SNE 0.00 -0.01, 0.00 0.040
    est_year * MAB 0.00 0.00, 0.01 0.12
est_year * season

0.2
    est_year * Spring 0.00 -0.01, 0.00 0.2
survey_area * season

0.7
    GB * Spring -1.2 -11, 8.7 0.8
    SNE * Spring -5.2 -15, 4.7 0.3
    MAB * Spring -4.5 -14, 5.5 0.4
est_year * survey_area * season

0.6
    est_year * GB * Spring 0.00 0.00, 0.01 0.8
    est_year * SNE * Spring 0.00 0.00, 0.01 0.3
    est_year * MAB * Spring 0.00 0.00, 0.01 0.3
1

CI = Confidence Interval

Code
# Summary tables
lspectra_mod_wig %>% 
  gtsummary::tbl_regression()   %>% 
  add_global_p(keep = T) %>% 
  bold_p(t = 0.05)  %>%
  bold_labels() %>%
  italicize_levels() %>% 
  as_gt() %>% 
  tab_header(title = "Wigley Community - Length Spectra Trends")
Wigley Community - Length Spectra Trends

Characteristic

Beta

95% CI

1

p-value

est_year 0.00 -0.01, 0.00 0.030
survey_area

0.3
    GoM
    GB -3.5 -10, 3.6 0.3
    SNE -6.4 -14, 0.69 0.077
    MAB -5.9 -13, 1.2 0.10
season

0.4
    Fall
    Spring 2.8 -4.2, 9.8 0.4
est_year * survey_area

0.3
    est_year * GB 0.00 0.00, 0.01 0.3
    est_year * SNE 0.00 0.00, 0.01 0.094
    est_year * MAB 0.00 0.00, 0.01 0.13
est_year * season

0.4
    est_year * Spring 0.00 0.00, 0.00 0.4
survey_area * season

0.002
    GB * Spring 3.7 -6.3, 14 0.5
    SNE * Spring 6.8 -3.2, 17 0.2
    MAB * Spring -12 -22, -1.6 0.023
est_year * survey_area * season

0.002
    est_year * GB * Spring 0.00 -0.01, 0.00 0.5
    est_year * SNE * Spring 0.00 -0.01, 0.00 0.2
    est_year * MAB * Spring 0.01 0.00, 0.01 0.018
1

CI = Confidence Interval

Code
# Summary tables
bmspectra_mod_wig %>% 
  gtsummary::tbl_regression()   %>% 
  add_global_p(keep = T) %>% 
  bold_p(t = 0.05)  %>%
  bold_labels() %>%
  italicize_levels() %>% 
  as_gt() %>% 
  tab_header(title = "Wigley Community - Mass Spectra Trends")
Wigley Community - Mass Spectra Trends

Characteristic

Beta

95% CI

1

p-value

est_year 0.00 0.00, 0.00 0.020
survey_area

0.3
    GoM
    GB -2.8 -6.2, 0.71 0.12
    SNE -2.6 -6.1, 0.93 0.15
    MAB -2.6 -6.1, 0.93 0.15
season

0.5
    Fall
    Spring 1.2 -2.3, 4.7 0.5
est_year * survey_area

0.4
    est_year * GB 0.00 0.00, 0.00 0.12
    est_year * SNE 0.00 0.00, 0.00 0.2
    est_year * MAB 0.00 0.00, 0.00 0.2
est_year * season

0.5
    est_year * Spring 0.00 0.00, 0.00 0.5
survey_area * season

<0.001
    GB * Spring 1.7 -3.2, 6.6 0.5
    SNE * Spring 1.3 -3.6, 6.2 0.6
    MAB * Spring -7.3 -12, -2.4 0.004
est_year * survey_area * season

<0.001
    est_year * GB * Spring 0.00 0.00, 0.00 0.5
    est_year * SNE * Spring 0.00 0.00, 0.00 0.7
    est_year * MAB * Spring 0.00 0.00, 0.01 0.003
1

CI = Confidence Interval

I also think it would be valuable to build out the story of size change further, so am thinking that plots like those of average (or median) length (all species) and weight (wigley species) for NES and the sub-regions, like those on p. 12 of the attached file above (the very first draft from late 2022) would be useful. - KMills

Code
# Load the median weight/length data
wigley_size_df <- read_csv(here::here("Data/processed/wigley_species_size_summary.csv"))
finfish_size_df <- read_csv(here::here("Data/processed/finfish_species_length_summary.csv"))


# Join them
size_results <- bind_rows(
  list(
    "Finfish Community" = finfish_size_df,
    "Wigley Subset" = wigley_size_df), 
  .id = "community"
)




# Get trends:
len_mod_ffish <- lmer(
  formula = med_len_cm ~ est_year * survey_area * season + (1|est_year),
  data = finfish_size_df)
len_mod_wig <- lmer(
  formula = med_len_cm ~ est_year * survey_area * season + (1|est_year),
  data = wigley_size_df)
wt_mod_wig <- lmer(
  formula = med_wt_kg ~ est_year * survey_area * season + (1|est_year),
  data = drop_na(wigley_size_df, med_wt_kg))



# Get the predictions and flag whether they are significant
size_trend_predictions <- bind_rows(list(
  "med_len_cm-Finfish Community" = get_preds_and_trendsignif(len_mod_ffish),
  "med_len_cm-Wigley Subset"     = get_preds_and_trendsignif(len_mod_wig),
  "med_wt_kg-Wigley Subset"   = get_preds_and_trendsignif(wt_mod_wig)
  ), .id = "model_id") %>% 
  separate(model_id, into = c("metric", "community"), sep = "-") %>% 
  mutate(metric = fct_rev(metric))






# Left side: 
# Median length - all species
# color = season vs. annual
# facet_wrap(~survey_area)


size_long <- size_results %>% 
  pivot_longer(cols = c(med_len_cm, med_wt_kg), 
               names_to = "metric", 
               values_to = "val")  %>% 
  mutate(survey_area = fct_relevel(survey_area, area_levels),
         season = fct_rev(season))


# just length for scales
len_plot <-  size_long %>% 
  filter(metric == "med_len_cm") %>% 
  drop_na(val) %>% 
  ggplot() +
  geom_ribbon(
    data = filter(size_trend_predictions, metric == "med_len_cm", non_zero == TRUE),
    aes(x, ymin = conf.low, ymax = conf.high, fill = season),
    linewidth = 1, alpha = 0.35) +
  geom_point(
    #data = filter(size_long, metric == "med_len_cm"),
    aes(est_year, val, color = season),
    size = 0.8, alpha = 0.5) +
  geom_line(
    data = filter(size_trend_predictions, metric == "med_len_cm", non_zero == TRUE),
    aes(x, predicted, color = season),
    linewidth = 1, alpha = 1) +
  scale_fill_gmri() +
  scale_color_gmri() +
  facet_grid(survey_area ~ metric*community,
             scales = "free") +
  theme(strip.text.y = element_blank()) +
  labs(
    x = "Year",
    y = "Length (cm)")

# Right: 
# Median weight - wigley species
# color = season vs. annual
# facet_wrap(~survey_area)

# weight plots
wt_plot <- size_long %>% 
  filter(metric == "med_wt_kg") %>% 
  drop_na(val) %>% 
  ggplot() +
  geom_ribbon(
    data = filter(size_trend_predictions, metric == "med_wt_kg", non_zero == TRUE),
    aes(x, ymin = conf.low, ymax = conf.high, fill = season),
    linewidth = 1, alpha = 0.35) +
  geom_point(
    #data = filter(size_long, metric == "med_len_cm"),
    aes(est_year, val, color = season),
    size = 0.8, alpha = 0.5) +
  geom_line(
    data = filter(size_trend_predictions, metric == "med_wt_kg", non_zero == TRUE),
    aes(x, predicted, color = season),
    linewidth = 1, alpha = 1) +
  scale_fill_gmri() +
  scale_color_gmri() +
  facet_grid(survey_area ~ metric*community,
             scales = "free")  +
  labs(
    x = "Year",
    y = "Weight (kg)")

(len_plot | wt_plot) + plot_layout(guides = "collect", widths = c(3,1.5)) & theme(legend.position = "bottom")

A couple things have started to catch my eye the more I’ve thought about volatility in these numbers.

There seem to be two situations happening on occassion. The first is more common in GOM+GB, and that is a 5-10cm drop in median length. This sudden drop in size I am imagining is likely due to surges in new recruits.

The other patttern which is more common in MAB but looks like it happens a handful of times in GB is the reverse situation, where median size surges upward in isolated years. For these situations I think we likely could trace these down to exceptionally high catches of larger species like elasmobranchs.

Relationships to Temperature and Landings

The following plots and summaries will relate the Wigley species spectra to Temperature and Live Landings

Code
# data limited community
ffish_medlen_model_df <- read_csv(here::here("Data/model_ready/large_community_medlength_mod.csv"))
ffish_lenspectra_model_df <- read_csv(here::here("Data/model_ready/large_community_lenspectra_mod.csv"))

# well studied community
wigley_medlen_model_df <- read_csv(here::here("Data/model_ready/wigley_community_medsize_mod.csv"))
wigley_lenspectra_model_df <- read_csv(here::here("Data/model_ready/wigley_community_lenspectra_mod.csv"))
wigley_bmspectra_model_df <- read_csv(here::here("Data/model_ready/wigley_community_bmspectra_mod.csv"))

# Use one for plots
wigley_bmspectra_model_df <- wigley_bmspectra_model_df %>% 
  mutate(survey_area = case_when(
    survey_area == "GoM" ~ "Gulf of Maine",
    survey_area == "GB" ~ "Georges Bank",
    survey_area == "SNE" ~ "Southern New England",
    survey_area == "MAB" ~ "Mid-Atlantic Bight"),
  survey_area = factor(survey_area, levels = area_levels_long),
  season = fct_rev(season))

Bottom Temperature Changes

Code
wigley_bmspectra_model_df %>% 
  ggplot(aes(est_year, bot_temp, color = season)) +
  geom_point(size = 0.8, alpha = 0.5) +
  geom_ma(size = 1, n = 5, ma_fun = SMA, linetype = 1) +
  scale_color_gmri() + 
  facet_wrap(~survey_area, ncol = 1) +
  scale_y_continuous(labels = label_number(suffix = "\u00b0C")) +
  labs(y = "Bottom Temperature", x = "Year", color = "5-Year Smooth")

Total Landings

Code
wigley_bmspectra_model_df %>% 
  distinct(est_year, survey_area, total_weight_lb) %>% 
  ggplot(aes(est_year, total_weight_lb/1e6)) +
  geom_point(size = 0.8, alpha = 0.5) +
  geom_ma(size = 1, n = 5, ma_fun = SMA, linetype = 1) +
  facet_wrap(~survey_area, ncol = 1) +
  scale_y_continuous(labels = label_number(suffix = "M")) +
  labs(y = "Total Landings (live lb.)", x = "Year")


Spiny Dogfish Sensitivity

How much can one species shift the spectra, and is this a definitive/conclusive proof that its the dogfish shallowing the slope in MAB?

Code
# # Run MAB with and without spiny dogfish
# # Run the year, season, area fits for the filtered data
# no_dogs_bmspectra <- group_binspecies_spectra(
#   ss_input = filter(wigley_in, comname != "spiny dogfish"),
#   grouping_vars = c("est_year", "season", "survey_area"),
#   abundance_vals = "numlen_adj",
#   length_vals = "length_cm",
#   use_weight = TRUE,
#   isd_xmin = 1,
#   global_min = TRUE,
#   isd_xmax = NULL,
#   global_max = FALSE,
#   bin_width = 1,
#   vdiff = 2)

# # Save those
# write_csv(
#   no_dogs_bmspectra,
#   here::here("Data/processed/wigley_species_nodogfish_bmspectra.csv"))
Code
# Read them in, do some plotting
no_dogs_bmspectra <- read_csv(
  here::here("Data/processed/wigley_species_nodogfish_bmspectra.csv")) %>% 
  mutate(survey_area = factor(survey_area, levels = area_levels))


# Format a little
no_dogs_bmspectra <- no_dogs_bmspectra %>% 
  mutate(est_year = as.numeric(est_year))


# Join them together
dfish_results <- bind_rows(
   list(
     "Wigley Full" = wigley_bmspectra,
     "Wigley w/o Spiny Dogfish" =  no_dogs_bmspectra), 
   .id = "community") %>% 
  mutate(metric = "bodymass_spectra") %>% 
  mutate(
    survey_area = fct_relevel(survey_area, area_levels))




# Get trends for no dogfish
nodfish_mod <- lmer(
  formula = b ~ est_year * survey_area * season + (1|est_year),
  data = no_dogs_bmspectra)



dogfish_sens_predictions <- bind_rows(list(
  "bodymass_spectra-Wigley Full" = get_preds_and_trendsignif(bmspectra_mod_wig),
  "bodymass_spectra-Wigley w/o Spiny Dogfish" = get_preds_and_trendsignif(nodfish_mod)), 
  .id = "model_id") %>% 
  separate(model_id, into = c("metric", "community"), sep = "-") %>% 
  mutate(metric = fct_rev(metric))



# Plot the differences
ggplot() +
  geom_ribbon(
    data = filter(dogfish_sens_predictions, non_zero == TRUE),
    aes(x, ymin = conf.low, ymax = conf.high, fill = season),
    linewidth = 1, alpha = 0.35) +
  geom_point(
    data = dfish_results,
    aes(est_year, b, color = season),
    size = 0.8, alpha = 0.5) +
  geom_line(
    data = filter(dogfish_sens_predictions, non_zero == TRUE),
    aes(x, predicted, color = season),
    linewidth = 1, alpha = 1) +
  scale_fill_gmri() +
  scale_color_gmri() +
  facet_grid(survey_area ~ metric*community, scales = "free") +
  labs(
    x = "Year",
    y = "Exponent of Size Spectra",
    title = "Shift in Spectra Trend(s) with Exclusion of Spiny Dogfish")

Large Fish Index

When I looked at large/small fish indices I looked at two metrics:

  • The large fish index (proportion of biomass total represented by fishes in top 5-10% of body sizes)
  • The large species index

We will focus on the former for the plots below. Fish size for the below plots will be based on length because these are measured for all individuals.

Large fish will be considered anything over 73cm, representing the 95th percentile. Values are the fraction of total bodymass for a given season that is coming from the large fishes.

The fraction of the biomass held in fishes larger than 73cm is lower in the two northern areas, and falling across both seasons. In SNE its declining in the spring, and in the MAB its increasing.

Code
##### Large Fish Index  ####


# Get 95th percentile as threshold
DescTools::Quantile(
  wigley_in$length_cm, 
  weights = wigley_in$numlen_adj, 
  probs = c(0.1, 0.5, 0.90))
10% 50% 90% 
 10  21  55 
Code
wigley_in <- wigley_in %>% 
  mutate(large = ifelse(length_cm > 73, T, F))



# Get total bio
total_bio <-  wigley_in %>% 
  group_by(survey_area, est_year, season) %>% 
  summarise(
    total_bio = sum(sum_weight_kg),
    .groups = "drop")
  
  
# Get large fish bio
lf_bio <- wigley_in %>% 
  group_by(survey_area, est_year, season) %>% 
  summarise(
    large_bio = sum(sum_weight_kg * large),
    .groups = "drop")

# Join and divide
lfi <- left_join(total_bio, lf_bio) %>% 
  mutate(LFI = large_bio / total_bio)  %>% 
  mutate(survey_area = factor(survey_area, levels = area_levels))

# trendz
# Get trends for no dogfish
lfi_mod <- lmer(
  formula = LFI ~ est_year * survey_area * season + (1|est_year),
  data = lfi)

lfi_predictions <- get_preds_and_trendsignif(lfi_mod) %>% 
  mutate(survey_area = factor(survey_area, levels = area_levels))


# Plot the LFI
ggplot() +
  geom_ribbon(
    data = filter(lfi_predictions, non_zero == TRUE),
    aes(x, ymin = conf.low, ymax = conf.high, fill = season),
    linewidth = 1, alpha = 0.35) +

  geom_point(
    data = lfi, aes(est_year, LFI, color = season),
    size = 0.8, alpha = 0.5) +
    geom_line(
    data = filter(lfi_predictions, non_zero == TRUE),
    aes(x, predicted, color = season),
    linewidth = 1, alpha = 1) +
  scale_fill_gmri() +
  scale_color_gmri() +
  scale_y_continuous(limits = c(0,1)) +
  facet_grid(survey_area~., scales = "free") +
  labs(
    x = "Year",
    y = "Large Fish Index",
    title = "Large Fish Index",
    fill = "Season",
    color = "Season")

The following species are those which has representation in the large fish index:

Code
wigley_in %>% 
  filter(large) %>% 
  distinct(comname) %>% 
  arrange(comname) %>% 
  rename(`Common Name:` = comname) %>% 
  gt() %>%
  tab_header(title = "Large Fishes")
Large Fishes
Common Name:
american plaice
atlantic angel shark
atlantic cod
atlantic halibut
atlantic sharpnose shark
atlantic sturgeon
atlantic torpedo
atlantic wolffish
barndoor skate
bluefish
bluntnose stingray
bullnose ray
clearnose skate
cownose ray
cusk
goosefish
greater amberjack
haddock
ocean pout
pollock
roughtail stingray
sand tiger
sandbar shark
silver hake
smooth butterfly ray
smooth dogfish
southern stingray
spiny butterfly ray
spiny dogfish
spotted hake
striped bass
summer flounder
thorny skate
weakfish
white hake
winter flounder
winter skate

Story Thoughts

Below is a table of some papers operating in the same area or around the same topic that are particularly relevant:

Code
shelf_papers <- tribble(
  ~"Author", ~"Year", ~"Conjecture",
  "Friedland et al.", 2024, "Body size has declined despite more abundance & biomass. Competition is preventing larger sizes.",
  "Krumsick", 2020, "Empirical slopes from trawl survey in labrador sea were steeper than theoretical from nitrogen stable isotope ratios"
)

Methodology Hangups

I do not have confidence that the values we have for slopes presently:

  • Represent the actual distribution of the data (mis-application of an assumed distribution)
  • Are representative of some meaningful feature of the community (the slope from the above mis-application has no meaning)

I’ve changed it about fifty times now, and I am concerned that people are now convinced of a story despite these values not meaning what we are assuming they mean.

Thoughts on Min/Max Parameters

Our hypotheses both relate primarily to abundance of non-age0 fishes. It might make sense to move the minimum size up to something larger. This will accomplish two things: 1) lower any noise in median size or spectra slope that results from large recruit cohorts & 2) help ensure that the size range we’re estimating spectra for are fully selected by the gear.

Currently a 1g minimum size is likely lower than the mesh size selectivity.

This section from Krumsick 2024 describes how this has been handled previously > To account for reduced catchability of small fish,past size- spectrum studies have typically only used size frequency data for size classes larger than the peak size class in observed data (i.e. the descending slope of the observed size-abundance re- lationship) when estimating the slope of sizespectra (e.g.Rice and Gislason 1996, Daan et al. 2005, Krumsick and Fisher 2020).

The following table is a short collection of how authors who cited edwards treated this step:

Code
# Table of papers using mle method and their parameter choices:
lme_params <- tribble(
  ~"Author", ~"Year", ~"Data Used", ~"method", ~"xmin", ~"xmax",
  "Wood", 2024, "Hook and line fisheries dependent reef fishes from fishermen interviews", "log10 bins of 15mm", "141mm", "320mm",
  
  "Letessier", 2024, "Baited Remote Underwater Video Systems BRUVS", "Two scales: normalized spectra w/ log10 bins for latitudinal brackets, MLE method for individual size distribution for each survey day" , "not reported", "not reported",
  
  "Rice and Gislason", 1996, "Trawl survey of North Sea Fish + multi-species virtual pop. analysis", "ln
abundance within a size interval (10cm) to ln size intervals", "not stated", "not stated",
  
  "Daan", 2005, "North Sea fisheries independent trawl surveys", "ln(abundance cpue) within bins ~ 5 & 10cm bins", 
  "After inspection of the size range
conforming to a ln-linear slope: ", "size classes at
the lower (poor retention in the gear) and upper (captures
too infrequent for robust estimates) end of the range were
excluded",

  "Krumsick", 2020, "Labrador canada fisheries independent trawl", "...followed recommen-
dations provided by Edwards et al. (2017), although
the prescribed maximum likelihood estimate approach
departed from the empirical data (Fig. S3)... then binned", "64g", "not stated",

"Blanchard", 2009, "North sea predator and detritvore log(abundance/m2) ~ log(bodymass) slopes", "log10 body mass bins", "predators: 256g", "predators: 4kg"

)


lme_params %>% gt()
Author Year Data Used method xmin xmax
Wood 2024 Hook and line fisheries dependent reef fishes from fishermen interviews log10 bins of 15mm 141mm 320mm
Letessier 2024 Baited Remote Underwater Video Systems BRUVS Two scales: normalized spectra w/ log10 bins for latitudinal brackets, MLE method for individual size distribution for each survey day not reported not reported
Rice and Gislason 1996 Trawl survey of North Sea Fish + multi-species virtual pop. analysis ln abundance within a size interval (10cm) to ln size intervals not stated not stated
Daan 2005 North Sea fisheries independent trawl surveys ln(abundance cpue) within bins ~ 5 & 10cm bins After inspection of the size range conforming to a ln-linear slope: size classes at the lower (poor retention in the gear) and upper (captures too infrequent for robust estimates) end of the range were excluded
Krumsick 2020 Labrador canada fisheries independent trawl ...followed recommen- dations provided by Edwards et al. (2017), although the prescribed maximum likelihood estimate approach departed from the empirical data (Fig. S3)... then binned 64g not stated
Blanchard 2009 North sea predator and detritvore log(abundance/m2) ~ log(bodymass) slopes log10 body mass bins predators: 256g predators: 4kg
Code
# Broad Distribution
#log2 bins for easy-of-access
#' @title Build Log 2 Bin Structure Dataframe
#' 
#' @description Used to build a dataframe containing equally spaced log2 bins for
#' size spectra analysis. Contains details on the left and right limits, midpoint, bin width, 
#' and a text label for the bins. log2bin number ascends with increasing size for eeasy plotting.
#'
#' @param log2_min 
#' @param log2_max 
#' @param log2_increment 
#'
#' @return
#' @export
#'
#' @examples
define_log2_bins <- function(log2_min = 0, log2_max = 13, log2_increment = 1){
  
  # How many bins
  n_bins  <- length(seq(log2_max, log2_min + log2_increment, by = -log2_increment))
  
  # Build Equally spaced log2 bin df
  log2_bin_structure <- data.frame(
    "log2_bins" = as.character(seq(n_bins, 1, by = -1)),
    "left_lim"  = seq(log2_max - log2_increment, log2_min, by = -log2_increment),
    "right_lim" = seq(log2_max, log2_min + log2_increment, by = -log2_increment)) %>% 
    mutate(
      bin_label    = str_c(round(2^left_lim, 3), " - ", round(2^right_lim, 3), "g"),
      bin_width    = 2^right_lim - 2^left_lim,
      bin_midpoint = (2^right_lim + 2^left_lim) / 2) %>% 
    arrange(left_lim)
  
  return(log2_bin_structure)
}
Code
#' @title Assign Manual log2 Bodymass Bins - By weight
#'
#' @description Manually assign log2 bins based on individual length-weight bodymass 
#' in increments of 1 on the log2 scale. Returns data with bins assigned based on individual
#' length-weight biomass
#' 
#' Uses maximum weight, and its corresponding bin as the limit.
#'
#' @param wmin_grams Catch data prepared for mle calculation, use prep_wmin_wmax
#' @param log2_increment Equally spaced increments to use for log 2 bin sizes. Default = 0.5.
#'
#' @return
#' @export
#'
#' @examples
assign_log2_bins <- function(wmin_grams, log2_increment = 1){
  
  
  #### 1. Set up bodymass bins
  
  # filter missing weights
  size_data <- wmin_grams %>% 
    filter(wmin_g > 0,
           is.na(wmin_g) == FALSE,
           wmax_g > 0,
           is.na(wmax_g) == FALSE)
  
  # Get bodymass on log2() scale
  size_data$log2_weight <- log2(size_data$ind_weight_g)
  
  # Set up the bins - Pull min and max weights from data available
  #min_bin <- floor(min(size_data$log2_weight))
  min_bin <- 0
  max_bin <- ceiling(max(size_data$log2_weight))
  
  
  # Build a bin key, could be used to clean up the incremental assignment or for apply style functions
  log2_bin_structure <- define_log2_bins(
    log2_min = min_bin, 
    log2_max = max_bin, 
    log2_increment = log2_increment)
  
  
  
  # Loop through bins to assign the bin details to the data
  log2_assigned <- log2_bin_structure %>%
    split(.$log2_bins) %>%
    map_dfr(function(log2_bin){
      
      # limits and labels
      l_lim   <- log2_bin$left_lim
      r_lim   <- log2_bin$right_lim
      bin_num <- as.character(log2_bin$log2_bin)
      
      # assign the label to the appropriate bodymasses
      size_data %>% mutate(
        log2_bins = ifelse( between(log2_weight, l_lim, r_lim), bin_num, NA),
        log2_bins = as.character(log2_bins)) %>%
        drop_na(log2_bins)
      
    })
  
  # Join in the size bins
  log2_assigned <- left_join(log2_assigned, log2_bin_structure, by = "log2_bins")
  
  # return the data with the bins assigned
  return(log2_assigned)
  
}
Code
#' @title Calculate Normalized and De-Normalized Abundances
#'
#' @description For binned size spectra estimation we use the stratified abundance divided by the
#' bin widths (normalized size spectra). Another way to present the data is to de-normalize, which 
#' takes those values and multiplies them by the mid-point of the log-scale bins.
#' 
#' min/max & bin_increments are used to enforce the presence of a size bin in the event that 
#' there is no abundance. This is done for comparing across different groups/areas that should 
#' conceivably have the same size range sampled.
#'
#' @param log2_assigned size data containing the bin assignments to use
#' @param min_log2_bin Minimum 2^x value for the size spectra being measured (>=)
#' @param max_log2_bin Maximum 2^x value for the size spectra being measured (<)
#' @param bin_increment The bin-width on log scale that separates each bin
#' @param ... Additional grouping factors with which to aggregate on besides the size bins themselves
#'
#' @return
#' @export
#'
#' @examples
aggregate_log2_bins <- function(
    log2_assigned, 
    min_log2_bin = 0, 
    max_log2_bin = 13, 
    bin_increment = 1,
    ...){
  
  # Full Possible Bin Structure
  # Fills in any gaps
  log2_bin_structure <- define_log2_bins(
    log2_min       = min_log2_bin, 
    log2_max       = max_log2_bin, 
    log2_increment = bin_increment)
  
  
  # Capture all the group levels with a cheeky expand()
  if(missing(...) == FALSE){
    log2_bin_structure <- log2_bin_structure %>% 
      tidyr::expand(left_lim, distinct(log2_assigned, ...)) %>% 
      full_join(log2_bin_structure)
  }
  
  
  
  # Get bin breaks
  log2_breaks <- sort(
    unique(c(log2_bin_structure$left_lim, log2_bin_structure$right_lim)))
  
  
  # Get Totals for bodymass and abundances
  log2_aggregates <- log2_assigned %>% 
    group_by(left_lim, ...) %>% 
    summarise(abundance   = sum(numlen_adj, na.rm = T),
              weight_g    = sum(wmin_g, na.rm = T),
              .groups = "drop")
  
  
  # Join back in what the limits and labels are
  # The defined bins and their labels enforce the size limits
  log2_prepped <- left_join(
    x = log2_bin_structure, 
    y = log2_aggregates)
  
  
  #### Fill Gaps with Zero's?? 
  # This ensures that any size bin that is intended to be in use is actually used
  log2_prepped <- log2_prepped %>% 
    mutate(
      abundance = ifelse(is.na(abundance), 1, abundance),
      weight_g = ifelse(is.na(weight_g), 1, weight_g))
  
  
  #### normalize abundances using the bin widths
  log2_prepped <- log2_prepped %>% 
    mutate(
      normalized_abund = abundance / bin_width,
      normalized_biom = weight_g / bin_width,
      # # Remove Bins Where Normalized Biomass < 0? No!
      # normalized_abund = ifelse(normalized_abund < 2^0, NA, normalized_abund),
      # norm_strat_abund = ifelse(norm_strat_abund < 2^0, NA, norm_strat_abund)
    )
  
  # Add de-normalized abundances (abundance * bin midpoint)
  log2_prepped <- log2_prepped %>% 
    mutate(
      denorm_abund = normalized_abund * bin_midpoint,
      denorm_biom = normalized_biom * bin_midpoint)
  
  # Return the aggregations
  return(log2_prepped)
  
}

The following plots shows what the distribution looks like for all data (years, seasons, areas) over the range of sizes we are allowing to contribute to the estimation of seasonal spectra. I’ve highlighted where a minimum body size of 1g sits on this curve to mark where it sits.

Code
# Assign l2 bins
wigley_log2 <- assign_log2_bins(wigley_in, log2_increment = 1)

# Aggregate on year, area, season
wigley_all_aggs <- wigley_log2 %>% 
  aggregate_log2_bins(
    .,
    min_log2_bin = 0, 
    max_log2_bin = 10, 
    bin_increment = 1)

# Plot
ggplot(wigley_all_aggs, aes(2^left_lim, normalized_abund)) +
  geom_col() +
  geom_vline(xintercept = 1, linetype = 2, color = "red", linewidth = 1) +
  geom_label(
      data = data.frame(val = 1, label = "Current\nMinimum\nSize\nCutoff:\n1g"), 
      aes(x = val, y = I(0.5), label = label), 
      color = "red", label.size = 1,
      label.padding = unit(0.7, "lines"), label.r = unit(0.5, "lines")) +
  geom_vline(xintercept = 2^4, linetype = 2, color = "royalblue", linewidth = 1) +
  geom_label(
      data = data.frame(val = 2^4, label = "More\nLikely\nFully\nSelected:\n16g"), 
      aes(x = val, y = I(0.5), label = label), 
      color = "royalblue", label.size = 1,
      label.padding = unit(0.7, "lines"), label.r = unit(0.5, "lines")) +
  scale_x_continuous(labels = label_log(base = 2), transform = "log2", breaks = 2^c(0:12)) +
  scale_y_continuous(labels = label_log(base = 2), transform = "log2") +
  labs(
    x = "Individual Body Weight (g)",
    y = "Normalized Abundance\n(total abundance / bin width)",
  title = "All-year, all-season size spectrum for Wigley Species Subset")

I picked 16g here basically eyeballing things, but my point is mostly that I think there is a need and an advantage to shifting the minimum size. We could probably be more scientific and look at the mesh size used (codend liner 1 inch stretched diamond mesh), but I think precision there is less important that being closer than we are now.

In all regions it seems like the typical linear slope begins at a larger size than 1g.

Code
# Aggregate on year, area, season
wigley_season_aggs <- wigley_log2 %>% 
  mutate(group_var = str_c(survey_area, "_", season)) %>% 
  split(.$group_var) %>% 
  map_dfr(
    ~aggregate_log2_bins(
      .x,
      min_l10_bin = 0, 
      max_l10_bin = 10, 
      bin_increment = 1),
    .id = "group_var") %>% 
  separate(col = group_var, into = c("survey_area", "season")) %>% 
  mutate(season = fct_rev(season))
  

# Plot
ggplot(wigley_season_aggs, aes(2^left_lim, normalized_abund)) +
  geom_col(aes(fill = season),  position = "dodge", alpha  = 0.4) +
  geom_vline(xintercept = 1, linetype = 2, color = "red", linewidth = 1) +
  geom_label(
      data = data.frame(val = 1, label = "1g"),
      aes(x = val, y = I(0.5), label = label),
      color = "red", label.size = 0.5, size = 2,
      label.padding = unit(0.7, "lines"), label.r = unit(0.5, "lines")) +
  geom_vline(xintercept = 2^4, linetype = 1, color = "royalblue", linewidth = 1) +
  geom_label(
      data = data.frame(val = 2^4, label = "16g"),
      aes(x = val, y = I(0.5), label = label),
      color = "royalblue", label.size = 0.5, size = 2,
      label.padding = unit(0.7, "lines"), label.r = unit(0.5, "lines")) +
  scale_fill_gmri() +
  scale_x_continuous(labels = label_log(base = 2), transform = "log2", breaks = 2^c(0:12)) +
  scale_y_continuous(labels = label_log(base = 2), transform = "log2") +
  facet_wrap(~survey_area, ncol = 1, scales = "fixed") +
  labs(
    x = "Individual Body Weight (g)",
    y = "Normalized Abundance\n(total abundance / bin width)",
    color = "Season",
    title = "All-year, Seasonal size spectrum for Wigley Species Subset")

If we take a year at random the distributions are even more patchy:

Code
# Aggregate on year, area, season
wigley_season_aggs <- wigley_log2 %>% 
  filter(est_year == 2000) %>% 
  mutate(group_var = str_c(survey_area, "_", season)) %>% 
  split(.$group_var) %>% 
  map_dfr(
    ~aggregate_log2_bins(
      .x,
      min_l10_bin = 0, 
      max_l10_bin = 10, 
      bin_increment = 1),
    .id = "group_var") %>% 
  separate(col = group_var, into = c("survey_area", "season")) %>% 
  mutate(season = fct_rev(season))
  

# Plot
ggplot(wigley_season_aggs, aes(2^left_lim, normalized_abund)) +
  geom_col(aes(fill = season),  position = "dodge", alpha  = 0.4) +
  geom_vline(xintercept = 1, linetype = 2, color = "red", linewidth = 1) +
  geom_label(
      data = data.frame(val = 1, label = "1g"),
      aes(x = val, y = I(0.5), label = label),
      color = "red", label.size = 0.5, size = 2,
      label.padding = unit(0.7, "lines"), label.r = unit(0.5, "lines")) +
  geom_vline(xintercept = 2^4, linetype = 1, color = "royalblue", linewidth = 1) +
  geom_label(
      data = data.frame(val = 2^4, label = "16g"),
      aes(x = val, y = I(0.5), label = label),
      color = "royalblue", label.size = 0.5, size = 2,
      label.padding = unit(0.7, "lines"), label.r = unit(0.5, "lines")) +
  scale_fill_gmri() +
  scale_x_continuous(labels = label_log(base = 2), transform = "log2", breaks = 2^c(0:12)) +
  scale_y_continuous(labels = label_log(base = 2), transform = "log2") +
  facet_wrap(~survey_area, ncol = 1, scales = "fixed") +
  labs(
    x = "Individual Body Weight (g)",
    y = "Normalized Abundance\n(total abundance / bin width)",
    color = "Season",
    title = "2000, Seasonal size spectrum for Wigley Species Subset")

There is a similar lack of tuning for maximum weight/length as well. For maximum size we presently take the maximum individual size in that area-year-season unit. While this also affects the distribution shape I am less concerned with this for now.

However. If we revert to the binning methods we may encounter issues where bins exist in some years and are NA in others.

Here is what the length distirbution looks like for the broader finfish community:

Length Distributions

Code
# Broad Distribution
#log2 bins for easy-of-access
#' @title Build Log 2 Bin Structure Dataframe
#' 
#' @description Used to build a dataframe containing equally spaced log2 bins for
#' size spectra analysis. Contains details on the left and right limits, midpoint, bin width, 
#' and a text label for the bins. log2bin number ascends with increasing size for eeasy plotting.
#'
#' @param log2_min 
#' @param log2_max 
#' @param log2_increment 
#'
#' @return
#' @export
#'
#' @examples
define_log2_bins <- function(log2_min = 0, log2_max = 13, log2_increment = 1){
  
  # How many bins
  n_bins  <- length(seq(log2_max, log2_min + log2_increment, by = -log2_increment))
  
  # Build Equally spaced log2 bin df
  log2_bin_structure <- data.frame(
    "log2_bins" = as.character(seq(n_bins, 1, by = -1)),
    "left_lim"  = seq(log2_max - log2_increment, log2_min, by = -log2_increment),
    "right_lim" = seq(log2_max, log2_min + log2_increment, by = -log2_increment)) %>% 
    mutate(
      bin_label    = str_c(round(2^left_lim, 3), " - ", round(2^right_lim, 3), "g"),
      bin_width    = 2^right_lim - 2^left_lim,
      bin_midpoint = (2^right_lim + 2^left_lim) / 2) %>% 
    arrange(left_lim)
  
  return(log2_bin_structure)
}
Code
#' @title Assign Manual log2 Bodymass Bins - By weight
#'
#' @description Manually assign log2 bins based on individual length-weight bodymass 
#' in increments of 1 on the log2 scale. Returns data with bins assigned based on individual
#' length-weight biomass
#' 
#' Uses maximum weight, and its corresponding bin as the limit.
#'
#' @param wmin_grams Catch data prepared for mle calculation, use prep_wmin_wmax
#' @param log2_increment Equally spaced increments to use for log 2 bin sizes. Default = 0.5.
#'
#' @return
#' @export
#'
#' @examples
assign_log2_len_bins <- function(
    size_increment_df, 
    metric_col = "length_cm",  
    log2_increment = 1){
  
  
  #### 1. Set up bodymass bins
  
  # filter missing weights
  size_data <- size_increment_df %>% 
    mutate(
      min_size = {{metric_col}},
      max_size = {{metric_col}}) %>% 
    filter(
      min_size > 0,
      is.na(min_size) == FALSE,
      max_size > 0,
      is.na(max_size) == FALSE)
  
  # Get bodymass on log2() scale
  size_data$log2_size <- log2(size_data[[metric_col]])
  
  # Set up the bins - Pull min and max weights from data available
  #min_bin <- floor(min(size_data$log2_weight))
  min_bin <- 0
  max_bin <- ceiling(max(size_data$log2_size))
  
  
  # Build a bin key, could be used to clean up the incremental assignment or for apply style functions
  log2_bin_structure <- define_log2_bins(
    log2_min = min_bin, 
    log2_max = max_bin, 
    log2_increment = log2_increment)
  
  
  
  # Loop through bins to assign the bin details to the data
  log2_assigned <- log2_bin_structure %>%
    split(.$log2_bins) %>%
    map_dfr(function(log2_bin){
      
      # limits and labels
      l_lim   <- log2_bin$left_lim
      r_lim   <- log2_bin$right_lim
      bin_num <- as.character(log2_bin$log2_bin)
      
      # assign the label to the appropriate bodymasses
      size_data %>% mutate(
        log2_bins = ifelse(between(log2_size, l_lim, r_lim), bin_num, NA),
        log2_bins = as.character(log2_bins)) %>%
        drop_na(log2_bins)
      
    })
  
  # Join in the size bins
  log2_assigned <- left_join(log2_assigned, log2_bin_structure, by = "log2_bins")
  
  # return the data with the bins assigned
  return(log2_assigned)
  
}



# # Test it
# finfish_log2 <- assign_log2_len_bins(size_increment_df = finfish_in, metric_col = "length_cm")
Code
#' @title Calculate Normalized and De-Normalized Abundances
#'
#' @description For binned size spectra estimation we use the stratified abundance divided by the
#' bin widths (normalized size spectra). Another way to present the data is to de-normalize, which 
#' takes those values and multiplies them by the mid-point of the log-scale bins.
#' 
#' min/max & bin_increments are used to enforce the presence of a size bin in the event that 
#' there is no abundance. This is done for comparing across different groups/areas that should 
#' conceivably have the same size range sampled.
#'
#' @param log2_assigned size data containing the bin assignments to use
#' @param min_log2_bin Minimum 2^x value for the size spectra being measured (>=)
#' @param max_log2_bin Maximum 2^x value for the size spectra being measured (<)
#' @param bin_increment The bin-width on log scale that separates each bin
#' @param ... Additional grouping factors with which to aggregate on besides the size bins themselves
#'
#' @return
#' @export
#'
#' @examples
aggregate_log2_len_bins <- function(
    log2_assigned, 
    min_log2_bin = 0, 
    max_log2_bin = 13, 
    bin_increment = 1,
    ...){
  
  # Full Possible Bin Structure
  # Fills in any gaps
  log2_bin_structure <- define_log2_bins(
    log2_min       = min_log2_bin, 
    log2_max       = max_log2_bin, 
    log2_increment = bin_increment)
  
  
  # Capture all the group levels with a cheeky expand()
  if(missing(...) == FALSE){
    log2_bin_structure <- log2_bin_structure %>% 
      tidyr::expand(left_lim, distinct(log2_assigned, ...)) %>% 
      full_join(log2_bin_structure)
  }
  
  
  
  # Get bin breaks
  log2_breaks <- sort(
    unique(c(log2_bin_structure$left_lim, log2_bin_structure$right_lim)))
  
  
  # Get Totals for bodymass and abundances
  log2_aggregates <- log2_assigned %>% 
    group_by(left_lim, ...) %>% 
    summarise(
      abundance   = sum(numlen_adj, na.rm = T),
      .groups = "drop")
  
  
  # Join back in what the limits and labels are
  # The defined bins and their labels enforce the size limits
  log2_prepped <- left_join(
    x = log2_bin_structure, 
    y = log2_aggregates)
  
  
  #### Fill Gaps with Zero's?? 
  # This ensures that any size bin that is intended to be in use is actually used
  log2_prepped <- log2_prepped %>% 
    mutate(
      abundance = ifelse(is.na(abundance), 1, abundance))
  
  
  #### normalize abundances using the bin widths
  log2_prepped <- log2_prepped %>% 
    mutate(
      normalized_abund = abundance / bin_width)
  
  # Add de-normalized abundances (abundance * bin midpoint)
  log2_prepped <- log2_prepped %>% 
    mutate(denorm_abund = normalized_abund * bin_midpoint)
  
  # Return the aggregations
  return(log2_prepped)
  
}
Code
# Gotta change the columns that the bins are built on
# Assign l2 bins
finfish_log2_lens <- assign_log2_len_bins(
  size_increment_df = finfish_in, 
  metric_col = "length_cm", 
  log2_increment = 1)

# Aggregate on year, area, season
finfish_season_len_aggs <- finfish_log2_lens %>% 
  mutate(group_var = str_c(survey_area, "_", season)) %>% 
  split(.$group_var) %>% 
  map_dfr(
    ~aggregate_log2_len_bins(
      .x,
      min_log2_bin = 0, 
      max_log2_bin = 10, 
      bin_increment = 1),
    .id = "group_var") %>% 
  separate(col = group_var, into = c("survey_area", "season")) %>% 
  mutate(season = fct_rev(season))
  

# Plot
ggplot(finfish_season_len_aggs, aes(2^left_lim, normalized_abund)) +
  geom_col(aes(fill = season),  position = "dodge", alpha  = 0.4) +
  geom_vline(xintercept = 1, linetype = 2, color = "red", linewidth = 1) +
  geom_label(
      data = data.frame(val = 1, label = "1cm"),
      aes(x = val, y = I(0.5), label = label),
      color = "red", label.size = 0.5, size = 2,
      label.padding = unit(0.7, "lines"), label.r = unit(0.5, "lines")) +
  scale_fill_gmri() +
  scale_x_continuous(labels = label_log(base = 2), transform = "log2", breaks = 2^c(0:12)) +
  scale_y_continuous(labels = label_log(base = 2), transform = "log2") +
  facet_wrap(~survey_area, ncol = 1, scales = "fixed") +
  labs(
    x = "Length (cm)",
    y = "Normalized Abundance\n(total abundance / bin width)",
    color = "Season",
    title = "Seasonal size spectrum for Wigley Species Subset")

Is it even Pareto?

This section from Edwards 2007 alludes to the ongoing debate on appropriateness of applying size spectra methods to empirical data.

The MLE method avoids binning and regression. Binning in general can be problematic (e.g. if a data set has no body masses <10 g but the lowest bin is defined as 8–16 g), and the choice of bin widths can affect the estimated slope (Vidondo et al. 1997). Regression-based methods are problematic because the intercept and the slope implicitly determine urn:x-wiley:2041210X:media:mee312641:mee312641-math-0064, which can erroneously be greater than some data values (James, Plank & Edwards 2011). They also assume that the errors in the logarithmic counts for each bin have the same variance, which may not be justified. Although regression can be understood in a likelihood context, this is different to explicitly using a likelihood-based method (Edwards et al. 2012).

What if we shifted minimum size to 16g?

I’m just going to go ahead and do it and see:

After doing some attempts to plot the impacts of moving it around, I am less convinced that its helping things and it may even be harming the situation.

In Krumsick et al. 2020 they determined visually that their data from the trawl data did not follow the distribution assumed by the MLE methodology and instead turned to normalized spectra using binned methods.

Code
# Load processing functions
#remotes::install_github("andrew-edwards/sizeSpectra")
library(sizeSpectra)
source(here::here("Code/R/Processing_Functions.R"))


# Run the year, season, area fits for the filtered data
wigley_bmspectra_16 <- group_binspecies_spectra(
  ss_input = filter(wigley_in, wmin_g >=16),
  grouping_vars = c("est_year", "season", "survey_area"),
  abundance_vals = "numlen_adj",
  length_vals = "length_cm",
  use_weight = TRUE,
  isd_xmin = 16,
  global_min = TRUE,
  isd_xmax = NULL,
  global_max = FALSE,
  bin_width = 1,
  vdiff = 2)

# # Save those
# write_csv(
#   wigley_bmspectra_16,
#   here::here("Data/processed/wigley_species_min16_bodymass_spectra.csv"))
Code
# Read them in, do some plotting
wigley_bmspectra_16 <- read_csv(
  here::here("Data/processed/wigley_species_min16_bodymass_spectra.csv")) %>% 
  mutate(survey_area = fct_relevel(survey_area, area_levels))
Code
# Model signigicant trends
bmspectra_16g_mod_wig <- lmer(
  formula = b ~ est_year * survey_area * season + (1|est_year),
  data = wigley_bmspectra_16)




# Get the predictions and flag whether they are significant
trend_predictions_16g <- get_preds_and_trendsignif(bmspectra_16g_mod_wig) %>% 
  mutate(model_id = "bodymass_spectra-Wigley Subset, 16g min") %>% 
  separate(model_id, into = c("metric", "community"), sep = "-") %>% 
  mutate(metric = fct_rev(metric),
         survey_area = fct_relevel(survey_area, area_levels))





# Plot significant trends over observed data

# Make the plot
 spectra_trends_16g <- ggplot() +
   geom_ribbon(
    data = filter(trend_predictions_16g, non_zero == TRUE),
    aes(x, ymin = conf.low, ymax = conf.high, fill = season),
    linewidth = 1, alpha = 0.35) +
  geom_point(
    data = wigley_bmspectra_16,
    aes(est_year, b, color = season),
    size = 0.8, alpha = 0.5) +
  geom_line(
    data = filter(trend_predictions_16g, non_zero == TRUE),
    aes(x, predicted, color = season),
    linewidth = 1, alpha = 1) +
  scale_fill_gmri() +
  scale_color_gmri() +
   scale_y_continuous() +
  facet_grid(survey_area ~ metric*community) +
  labs(
    x = "Year",
    y = "Exponent of Size Spectra")
 
 
 
# Compare to previous
 # Make the plot
wigley_trends_1g <- ggplot() +
   geom_ribbon(
    data = filter(trend_predictions, 
                  metric == "bodymass_spectra",
                  non_zero == TRUE),
    aes(x, ymin = conf.low, ymax = conf.high, fill = season),
    linewidth = 1, alpha = 0.35) +
  geom_point(
    data = filter(spectra_results, metric == "bodymass_spectra"),
    aes(est_year, b, color = season),
    size = 0.8, alpha = 0.5) +
  geom_line(
    data = filter(trend_predictions, 
                  metric == "bodymass_spectra",
                  non_zero == TRUE),
    aes(x, predicted, color = season),
    linewidth = 1, alpha = 1) +
  scale_fill_gmri() +
  scale_color_gmri() +
  scale_y_continuous() +
  facet_grid(survey_area ~ metric*community) +
  labs(
    x = "Year",
    y = "Exponent of Size Spectra")
 


 
(wigley_trends_1g | spectra_trends_16g) + plot_layout(guides = "collect") & theme(legend.position = "bottom")

Code
# Prepare inputs directly:
yr_op <- c(2000)
reg   <- "GoM"
seas  <- "Spring"


# Data for the spectra
actual_vals_1g <- wigley_in %>%
  filter(
    est_year %in% yr_op,
    survey_area == reg,
    season == seas,
    wmin_g >= 1)

actual_vals_16g <- wigley_in %>%
  filter(
    est_year %in% yr_op,
    survey_area == reg,
    season == seas,
    wmin_g >= 16)

Sensitivity to minimum size

Even when shifting the minimum size, its not clear that the size distribution now follows the pareto distribution more closely. Its easier to quickly look at the binned versions, but then we’re hopping across methods again.

Code
# Load processing functions
source(here::here("Code/R/Processing_Functions.R"))


# group_binspecies_spectra uses sizeSpectra::negLL.PLB.bins.species


# Get the slope over those years
mle_results_1g <- group_binspecies_spectra(
    ss_input = actual_vals_1g,
    grouping_vars = c("est_year", "season", "survey_area"),
    abundance_vals = "numlen_adj",
    length_vals = "length_cm",
    use_weight = FALSE,
    isd_xmin = 1,
    global_min = TRUE,
    isd_xmax = NULL,
    global_max = FALSE,
    bin_width = 1,
    vdiff = 2)



# and again for 16g
mle_results_16g <- group_binspecies_spectra(
    ss_input = actual_vals_16g,
    grouping_vars = c("est_year", "season", "survey_area"),
    abundance_vals = "numlen_adj",
    length_vals = "length_cm",
    use_weight = FALSE,
    isd_xmin = 16,
    global_min = TRUE,
    isd_xmax = NULL,
    global_max = FALSE,
    bin_width = 1,
    vdiff = 2)
Code
#' @title Individual Size Distribution Plot Prep
#' 
#' @description Prepares wmin_grams data to be plotted with the MLE
#' size spectrum slope fit.
#'
#' @param biomass_data Data prepped for mle estimation, use prep_wmin_wmax
#' @param min_weight_g Minimum weight cutoff to use to match bounded pareto minimum
#'
#' @return
#' @export
#'
#' @examples
isd_plot_prep <- function(
    biomass_data = databin_split, 
    min_weight_g = 1){
  
  # Arrange by lower weight 
  biomass_data <- dplyr::arrange(biomass_data, desc(wmin_g)) %>% 
    select(wmin_g, wmax_g, numlen_adj)
  
  # truncate the data to in match the spectra we fit's xmin/xmax
  biomass_data <- biomass_data %>% 
    filter(wmin_g >= min_weight_g,
           wmin_g != 0,
           is.na(wmin_g) == FALSE,
           wmax_g != 0,
           is.na(wmax_g) == FALSE)
  
  # Get the total number of fish for the group
  total_abundance <- sum(biomass_data$numlen_adj)
  
  # Have to do not with dplyr:
  wmin.vec <- biomass_data$wmin_g      # Vector of lower weight bin limits
  wmax.vec <- biomass_data$wmax_g      # Vector of upper weight bin limits
  num.vec  <- biomass_data$numlen_adj  # Vector of corresponding counts for those bins
  
  # doing it with purr and pmap
  biomass_bins <- select(biomass_data, wmin_g, wmax_g) %>% 
    distinct(wmin_g, wmax_g)
  
  # Go row-by-row and get the cumulative total greater than each 
  # minimum weight bin discrete size bin
  out_data <- pmap_dfr(biomass_bins, function(wmin_g, wmax_g){
    
    # 1. Count times wmin.vec >= individual wmin_g, multiply by number of fish for year
    # cumulative totals of sizes larger than the left lim
    countGTEwmin <- sum( (wmin.vec >= wmin_g) * num.vec)
    
    # 2. Count times wmin.vec >= individual wmax_g, multiply by number of fish for year
    # cumulative totals of sizes larger than the right lim
    lowCount <- sum( (wmin.vec >= wmax_g) * num.vec)
    
    # 3. Count times wmax.vec > individual wmin_g, multiply by number of fish for year
    highCount <- sum( (wmax.vec >  wmin_g) * num.vec)
    
    # 4. build table
    out_table <- data.frame(
      "wmin_g"       = wmin_g,
      "wmax_g"       = wmax_g,
      "countGTEwmin" = countGTEwmin,
      "lowCount"     = lowCount,
      "highCount"    = highCount
    )
  })
  
  
  
  # return the purr version
  return(out_data)
  
  
  
  
}
Code
# Control options
mle_results <- mle_results_1g
actual_vals <- actual_vals_1g
min_used <- 1




#### -- Plpot setup below  ---


# Power law parameters and summary details for the group of data:
b.MLE           <- mle_results$b
total_abundance <- mle_results$n
b.confMin       <- mle_results$confMin
b.confMax       <- mle_results$confMax

# Create range of x values from the group to get power law predictions
# min and max weights for power law
xmin  <- min_used
xmax  <- max(actual_vals$wmin_g)

# Create x values (individual bodymass) to predict across
# break up the Xlim into pieces between min and max
x.PLB <- seq(
  from = xmin,
  to   = xmax,
  by = 0.1) 

# get the length of that vector
x.PLB.length <- length(x.PLB) 

# Y values for plot limits/bounds/predictions from bounded power law pdf
y.PLB         <- (1 - sizeSpectra::pPLB(
  x = x.PLB, 
  b = b.MLE, 
  xmin = min(x.PLB), 
  xmax = max(x.PLB))) * total_abundance

y.PLB.confMin <- (1 - sizeSpectra::pPLB(
  x = x.PLB, 
  b = b.confMin, 
  xmin = min(x.PLB), 
  xmax = max(x.PLB))) * total_abundance

y.PLB.confMax <- (1 - sizeSpectra::pPLB(
  x = x.PLB, 
  b = b.confMax, 
  xmin = min(x.PLB), 
  xmax = max(x.PLB))) * total_abundance

# Put it in a df to make it easier
PLB_df <- data.frame(
  x.PLB   = x.PLB,
  y.PLB   = y.PLB,
  confMin = y.PLB.confMin,
  confMax = y.PLB.confMax) %>%
  mutate()


# 2. solve the power law function again in mutate for residuals
# need to do it twice for wmin_g and wmax_g
# Aggregate across overlapping size ranges
p_prepped <- actual_vals %>%
  isd_plot_prep(min_weight_g = min_used) %>%
  left_join(actual_vals) %>%
  select(comname, wmin_g, wmax_g, lowCount, highCount, countGTEwmin) %>%
  # Get the residuals
  mutate(
    # Estimated Abundance
    wmin_estimate = (1 - sizeSpectra::pPLB(x = wmin_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,
    wmax_estimate = (1 - sizeSpectra::pPLB(x = wmax_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,
    # Residuals
    b_resid = countGTEwmin - ((wmin_estimate + wmax_estimate)/2))



#####  Plotting Raw  ####

# Messing with the plot axes
fit_1g_gom_2000 <- ggplot() +
  # geom_ribbon(
  #   data = PLB_df, aes(x.PLB, ymin =confMin, ymax = confMax),
  #   fill = gmri_cols("orange"), 
  #   alpha = 0.4) +
  geom_line(
    data = PLB_df, aes(x.PLB, y.PLB),
    color = gmri_cols("orange"), 
    linewidth = 1) +
    geom_point(
      data = p_prepped %>% filter(lowCount >0),
      aes(x = (wmin_g+wmax_g)/2,
          y = countGTEwmin),
    color = gmri_cols("gmri blue"),
    alpha = 0.15,
    shape = 1) +
  scale_y_log10(
    labels = trans_format("log10", math_format(10^.x)), 
    limits = c(10^-1, 10^6)) +
  scale_x_log10(
    labels = trans_format("log10", math_format(10^.x)), 
    limits = c(10^-1, 10^6)) +
  labs(x = "Weight (g)", 
       y = "Abundance", 
       title = "Wigley Species - GOM - Spring - 2000",
       subtitle = "1g Minimum Size")

fit_1g_gom_2000 

Code
# Control options
mle_results <- mle_results_16g
actual_vals <- actual_vals_16g
min_used <- 16




#### -- Plpot setup below  ---

# Power law parameters and summary details for the group of data:
b.MLE           <- mle_results$b
total_abundance <- mle_results$n
b.confMin       <- mle_results$confMin
b.confMax       <- mle_results$confMax



# Create range of x values from the group to get power law predictions
# min and max weights for power law
xmin  <- min_used
xmax  <- max(actual_vals$wmin_g)

# Create x values (individual bodymass) to predict across
# break up the Xlim into pieces between min and max
x.PLB <- seq(
  from = xmin,
  to   = xmax,
  by = 0.1) 

# get the length of that vector
x.PLB.length <- length(x.PLB) 

# Y values for plot limits/bounds/predictions from bounded power law pdf
y.PLB         <- (1 - sizeSpectra::pPLB(x = x.PLB, b = b.MLE, xmin = min(x.PLB), xmax = max(x.PLB))) * total_abundance
y.PLB.confMin <- (1 - sizeSpectra::pPLB(x = x.PLB, b = b.confMin, xmin = min(x.PLB), xmax = max(x.PLB))) * total_abundance
y.PLB.confMax <- (1 - sizeSpectra::pPLB(x = x.PLB, b = b.confMax, xmin = min(x.PLB), xmax = max(x.PLB))) * total_abundance

# Put it in a df to make it easier
PLB_df <- data.frame(
  x.PLB   = x.PLB,
  y.PLB   = y.PLB,
  confMin = y.PLB.confMin,
  confMax = y.PLB.confMax) %>%
  mutate()


# 2. solve the power law function again in mutate for residuals
# need to do it twice for wmin_g and wmax_g
# Aggregate across overlapping size ranges
p_prepped <- actual_vals %>%
  isd_plot_prep(min_weight_g = min_used) %>%
  left_join(actual_vals) %>%
  select(comname, wmin_g, wmax_g, lowCount, highCount, countGTEwmin) %>%
  # Get the residuals
  mutate(
    # Estimated Abundance
    wmin_estimate = (1 - sizeSpectra::pPLB(x = wmin_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,
    wmax_estimate = (1 - sizeSpectra::pPLB(x = wmax_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,
    # Residuals
    b_resid = countGTEwmin - ((wmin_estimate + wmax_estimate)/2))



#####  Plotting Raw  ####

# Messing with the plot axes
fit_16g_gom_2000 <- ggplot() +
  geom_line(
    data = PLB_df, aes(x.PLB, y.PLB),
    color = gmri_cols("orange"), 
    linewidth = 1) +
    geom_point(
      data = p_prepped %>% filter(lowCount >0),
      aes(x = (wmin_g+wmax_g)/2,
          y = countGTEwmin),
    color = gmri_cols("gmri blue"),
    alpha = 0.15,
    shape = 1) +
  scale_y_log10(
    labels = trans_format("log10", math_format(10^.x)), 
    limits = c(10^-1, 10^6)) +
  scale_x_log10(
    labels = trans_format("log10", math_format(10^.x)), 
    limits = c(10^-1, 10^6)) +
  labs(x = "Weight (g)", 
       y = "Abundance", 
       title = "Wigley Species - GOM - Spring - 2000",
       subtitle = "16g Minimum Size")

fit_16g_gom_2000 

Sensitivity to max size

Code
# Get the slope over those years
mle_results_1g <- group_binspecies_spectra(
    ss_input = actual_vals_1g,
    grouping_vars = c("est_year", "season", "survey_area"),
    abundance_vals = "numlen_adj",
    length_vals = "length_cm",
    use_weight = FALSE,
    isd_xmin = 1,
    global_min = TRUE,
    isd_xmax = 10^5,
    global_max = TRUE,
    bin_width = 1,
    vdiff = 2)



# and again for 16g
mle_results_16g <- group_binspecies_spectra(
    ss_input = actual_vals_16g,
    grouping_vars = c("est_year", "season", "survey_area"),
    abundance_vals = "numlen_adj",
    length_vals = "length_cm",
    use_weight = FALSE,
    isd_xmin = 16,
    global_min = TRUE,
    isd_xmax = 10^5,
    global_max = TRUE,
    bin_width = 1,
    vdiff = 2)
Code
# Control options
mle_results <- mle_results_1g
actual_vals <- actual_vals_1g
min_used <- 1
max_used <- 10^5




#### -- Plpot setup below  ---


# Power law parameters and summary details for the group of data:
b.MLE           <- mle_results$b
total_abundance <- mle_results$n
b.confMin       <- mle_results$confMin
b.confMax       <- mle_results$confMax

# Create range of x values from the group to get power law predictions
# min and max weights for power law
xmin  <- min_used
xmax  <- max(actual_vals$wmin_g)

# Create x values (individual bodymass) to predict across
# break up the Xlim into pieces between min and max
x.PLB <- seq(
  from = xmin,
  to   = xmax,
  by = 0.1) 

# get the length of that vector
x.PLB.length <- length(x.PLB) 

# Y values for plot limits/bounds/predictions from bounded power law pdf
y.PLB         <- (1 - sizeSpectra::pPLB(x = x.PLB, b = b.MLE, xmin = min(x.PLB), xmax = max(x.PLB))) * total_abundance
y.PLB.confMin <- (1 - sizeSpectra::pPLB(x = x.PLB, b = b.confMin, xmin = min(x.PLB), xmax = max(x.PLB))) * total_abundance
y.PLB.confMax <- (1 - sizeSpectra::pPLB(x = x.PLB, b = b.confMax, xmin = min(x.PLB), xmax = max(x.PLB))) * total_abundance

# Put it in a df to make it easier
PLB_df <- data.frame(
  x.PLB   = x.PLB,
  y.PLB   = y.PLB,
  confMin = y.PLB.confMin,
  confMax = y.PLB.confMax) %>%
  mutate()


# 2. solve the power law function again in mutate for residuals
# need to do it twice for wmin_g and wmax_g
# Aggregate across overlapping size ranges
p_prepped <- actual_vals %>%
  isd_plot_prep(min_weight_g = min_used) %>%
  left_join(actual_vals) %>%
  select(comname, wmin_g, wmax_g, lowCount, highCount, countGTEwmin) %>%
  # Get the residuals
  mutate(
    # Estimated Abundance
    wmin_estimate = (1 - sizeSpectra::pPLB(x = wmin_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,
    wmax_estimate = (1 - sizeSpectra::pPLB(x = wmax_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,
    # Residuals
    b_resid = countGTEwmin - ((wmin_estimate + wmax_estimate)/2))



#####  Plotting Raw  ####

# Messing with the plot axes
fit_1g_gom_2000 <- ggplot() +
  geom_line(
    data = PLB_df, aes(x.PLB, y.PLB),
    color = gmri_cols("orange"), 
    linewidth = 1) +
    geom_point(
      data = p_prepped %>% filter(lowCount >0),
      aes(x = (wmin_g+wmax_g)/2,
          y = countGTEwmin),
    color = gmri_cols("gmri blue"),
    alpha = 0.15,
    shape = 1) +
  scale_y_log10(
    labels = trans_format("log10", math_format(10^.x)), 
    limits = c(10^-1, 10^6)) +
  scale_x_log10(
    labels = trans_format("log10", math_format(10^.x)), 
    limits = c(10^-1, 10^6)) +
  labs(x = "Weight (g)", 
       y = "Abundance", 
       title = "Wigley Species - GOM - Spring - 2000",
       subtitle = "1g Minimum Size, 10^5 max size limit")

fit_1g_gom_2000

Code
# Control options
mle_results <- mle_results_16g
actual_vals <- actual_vals_16g
min_used <- 16
max_used <- 10^5




#### -- Plpot setup below  ---

# Power law parameters and summary details for the group of data:
b.MLE           <- mle_results$b
total_abundance <- mle_results$n
b.confMin       <- mle_results$confMin
b.confMax       <- mle_results$confMax



# Create range of x values from the group to get power law predictions
# min and max weights for power law
xmin  <- min_used
xmax  <- max_used

# Create x values (individual bodymass) to predict across
# break up the Xlim into pieces between min and max
x.PLB <- seq(
  from = xmin,
  to   = xmax,
  by = 0.1) 

# get the length of that vector
x.PLB.length <- length(x.PLB) 

# Y values for plot limits/bounds/predictions from bounded power law pdf
y.PLB         <- (1 - sizeSpectra::pPLB(x = x.PLB, b = b.MLE, xmin = min(x.PLB), xmax = max(x.PLB))) * total_abundance
y.PLB.confMin <- (1 - sizeSpectra::pPLB(x = x.PLB, b = b.confMin, xmin = min(x.PLB), xmax = max(x.PLB))) * total_abundance
y.PLB.confMax <- (1 - sizeSpectra::pPLB(x = x.PLB, b = b.confMax, xmin = min(x.PLB), xmax = max(x.PLB))) * total_abundance

# Put it in a df to make it easier
PLB_df <- data.frame(
  x.PLB   = x.PLB,
  y.PLB   = y.PLB,
  confMin = y.PLB.confMin,
  confMax = y.PLB.confMax) %>%
  mutate()


# 2. solve the power law function again in mutate for residuals
# need to do it twice for wmin_g and wmax_g
# Aggregate across overlapping size ranges
p_prepped <- actual_vals %>%
  isd_plot_prep(min_weight_g = min_used) %>%
  left_join(actual_vals) %>%
  select(comname, wmin_g, wmax_g, lowCount, highCount, countGTEwmin) %>%
  # Get the residuals
  mutate(
    # Estimated Abundance
    wmin_estimate = (1 - sizeSpectra::pPLB(x = wmin_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,
    wmax_estimate = (1 - sizeSpectra::pPLB(x = wmax_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,
    # Residuals
    b_resid = countGTEwmin - ((wmin_estimate + wmax_estimate)/2))



#####  Plotting Raw  ####

# Messing with the plot axes
fit_16g_gom_2000 <- ggplot() +
  geom_line(
    data = PLB_df, aes(x.PLB, y.PLB),
    color = gmri_cols("orange"), 
    linewidth = 1) +
    geom_point(
      data = p_prepped %>% filter(lowCount >0),
      aes(x = (wmin_g+wmax_g)/2,
          y = countGTEwmin),
    color = gmri_cols("gmri blue"),
    alpha = 0.15,
    shape = 1) +
  scale_y_log10(
    labels = trans_format("log10", math_format(10^.x)), 
    limits = c(10^-1, 10^6)) +
  scale_x_log10(
    labels = trans_format("log10", math_format(10^.x)), 
    limits = c(10^-1, 10^6)) +
  labs(x = "Weight (g)", 
       y = "Abundance", 
       title = "Wigley Species - GOM - Spring - 2000",
       subtitle = "16g Minimum Size, 10^5 max size limit")
fit_16g_gom_2000